I previously processed the raw sequencing data, optimized the barcode clustering, quantified the pDNA data and normalized the cDNA data. In this script, I want to have a detailed look at the cDNA data from a general perspective.
How to make a good rendering table:
| column1 | column2 | column3 |
|---|---|---|
| 1 | 2 | 3 |
| a | b | c |
knitr::opts_chunk$set(echo = TRUE)
StartTime <-Sys.time()
# 8-digit Date tag:
Date <- substr(gsub("-","",Sys.time()),1,8)
# libraries:
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(maditr)
library(tibble)
library(pheatmap)
library(ggpubr)
library(ggbeeswarm)
library(ggforce)
library(viridis)
library(plyr)
library(cowplot)
library(gridExtra)Functions used thoughout this script.
SetFileName <- function(filename, initials) {
# Set filename with extension and initials to make filename with date integrated.
filename <- substitute(filename)
initials <- substitute(initials)
filename <- paste0(initials, Date, filename)
filename
}
cbind.fill <- function(...){
nm <- list(...)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}# Import processed bc counts from the preprocessing step
cDNA_df <- read.csv("/DATA/usr/m.trauernicht/projects/tf_activity_reporter/data/SuRE_TF_1/results/mt20200715_reporter_activity_filt.csv", header = T)
cDNA_df$background <- as.character(cDNA_df$background)
cDNA_df <- na.omit(cDNA_df)
cDNA_df$TF <- revalue(cDNA_df$TF, c("Tcfcp2l1" = "Tfcp2l1", "Tcfcp2l1_neg" = "Tfcp2l1_neg"))
cDNA_df$reporter_id <- gsub("Tcfcp2l1", "Tfcp2l1", cDNA_df$reporter_id)## `geom_smooth()` using formula 'y ~ x'
## Only ~10 of the 26 TFs in the library show promising activity at the first glimpse
## Prepare dataframe
# Caculate mean TF activity per condition
tf_activity_heatmap <- cDNA_df[cDNA_df$native_enhancer == "Yes",] %>%
select(TF, condition, tf_activity) %>% unique()
tf_activity_heatmap <- dcast(tf_activity_heatmap, condition ~ TF, value.var="tf_activity")
tf_activity_heatmap <- tf_activity_heatmap %>%
remove_rownames %>% column_to_rownames(var="condition")
comb_activity <- data.frame("pos" = colnames(tf_activity_heatmap),
"activity" = colSums(tf_activity_heatmap))
ggplot(comb_activity, aes(x = pos, y = activity)) +
geom_bar(stat = "identity", aes(fill = activity)) + theme_classic() +
theme(axis.title.y = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none") +
scale_fill_distiller(name = "activity", palette = "Reds", direction = 1)# Keeping the scale in the pheatmap function
myBreaks1 <- seq(-3,3,0.06)
# pheatmap function
pheatmap(as.matrix(tf_activity_heatmap),
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
breaks = myBreaks1, border_color = "#565656",
cellwidth = 10, cellheight = 10, cluster_rows = F, cluster_cols = F) ### All of these heatmaps conclude that there we have informative reporters for ~10 TFs, and that the TF reporter design matters for some but not all TFs
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "Run time: 2.466 mins"
## [1] "/DATA/usr/m.trauernicht/projects/tf_activity_reporter/code/SuRE_TF_1"
## [1] "Wed Jul 15 12:08:17 2020"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 cowplot_1.0.0 plyr_1.8.6 viridis_0.5.1
## [5] viridisLite_0.3.0 ggforce_0.3.1 ggbeeswarm_0.6.0 ggpubr_0.2.5
## [9] magrittr_1.5 pheatmap_1.0.12 tibble_3.0.1 maditr_0.6.3
## [13] dplyr_0.8.5 ggplot2_3.3.0 RColorBrewer_1.1-2
##
## loaded via a namespace (and not attached):
## [1] beeswarm_0.2.3 tidyselect_0.2.5 xfun_0.12 purrr_0.3.3
## [5] splines_3.6.3 lattice_0.20-38 colorspace_1.4-1 vctrs_0.2.4
## [9] htmltools_0.4.0 yaml_2.2.0 mgcv_1.8-31 rlang_0.4.5
## [13] pillar_1.4.3 glue_1.3.1 withr_2.1.2 tweenr_1.0.1
## [17] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0 ggsignif_0.6.0
## [21] gtable_0.3.0 evaluate_0.14 labeling_0.3 knitr_1.28
## [25] vipor_0.4.5 Rcpp_1.0.3 scales_1.1.0 farver_2.0.1
## [29] digest_0.6.23 stringi_1.4.6 polyclip_1.10-0 grid_3.6.3
## [33] tools_3.6.3 crayon_1.3.4 pkgconfig_2.0.3 Matrix_1.2-18
## [37] ellipsis_0.3.0 MASS_7.3-51.5 data.table_1.12.8 assertthat_0.2.1
## [41] rmarkdown_2.0 R6_2.4.1 nlme_3.1-143 compiler_3.6.3